/*
Creates incumbent panel from worker-year panel 
		// input: workeryear_panel, estabtreat_forRAIS_deid, microregions_municipality_concordance
		// output: incumbent_panel

*/

cap log close
cap log using "$logs/create_incumbent_sample", replace 

* Open worker-year panel
use "$files/rais/BR/workeryear_panel.dta", clear
drop newhire* atblemp*


*****************
* Merge with treatment variable at the worker level
* Only keep workers who were incumbent at baseline employer that is treated or untreated.
*****************

merge m:1 fakeid_estab using "$files/estabtreat_forRAIS_deid.dta"

* incumbent workers at treated or comparison establishments.
gen treatbase = treat if year==2014
bys fakeid_worker: gegen treatwork = max(treatbase)
keep if treatwork!=.
drop treatbase

* incumbent workers at signing establishments
gen signbase = signing if year==2014
bys fakeid_worker: gegen signwork = max(signbase)
keep if signwork!=.
drop signbase

* incumbent workers at covered establishments
gen coverbase = covered if year==2014
bys fakeid_worker: gegen coverwork = max(coverbase)
keep if coverwork!=.
drop coverbase

* incumbent workers at single-establishment firms
gen singestbase = singest if year==2014
bys fakeid_worker: gegen singestwork = max(singestbase)
keep if singestwork!=.
drop singestbase

* incumbent workers at establishments employing both men and women at baseline
gen bothFMbase = both_FM if year==2014
bys fakeid_worker: gegen bothFMwork = max(bothFMbase)
keep if bothFMwork!=.
drop bothFMbase


***********
* Fill in the panel just for the subset of incumbent workers
***********

xtset fakeid_worker year
tsfill, full

* Ensure te following variables are populated even when years are filled in
bys fakeid_worker: gegen xxx = max(treatwork)
replace treatwork = xxx
drop xxx

bys fakeid_worker: gegen xxx = max(singestwork)
replace singestwork = xxx
drop xxx

bys fakeid_worker: gegen xxx = max(bothFMwork)
replace bothFMwork = xxx
drop xxx


********
* Baseline characteristics
********

* Baseline occupation 
gen blocc1 = occ if year==2014
gegen blocc = max(blocc1), by(fakeid_worker)
lab var blocc "Baseline occupation"

* Baseline tenure
gen bl_ten = ten if year==2014
gegen blten = max(bl_ten), by(fakeid_worker) 
lab var blten "Baseline tenure"

* Baseline gender
gen fem = 1 if gender==2 & year==2014
gegen gender_bl = max(fem), by(fakeid_worker)
replace gender_bl = 0 if gender_bl!=1
lab var gender_bl "Gender at baseline"
ren gender gender_contemp
ren gender_bl gender
lab var gender_contemp "Contemporaneous gender"

* Baseline age
gen bl_age = age if year==2014
gegen blage = max(bl_age), by(fakeid_worker) 
lab var blage "Baseline age"

* Baseline childbearing age 
gen childbearing = (blage<=35 & blage>=20)
//assign missing to under-20s
replace childbearing = . if blage==. | blage<20
	
* Baseline microregion
cap drop _merge
rename municipality_mode municipality
merge m:1 municipality using "$raw/microregions_municipality_concordance.dta", keepusing(microregion)
drop if _merge==2
drop _merge
rename municipality municipality_mode
rename microregion microregion_mode
gen bl_mr = microregion_mode if year==2014
gegen blmr = max(bl_mr), by(fakeid_worker) 
lab var blmr "Baseline microregion"

* Baseline state
gen state_mode = floor(municipality_mode/1e4)
egen stgrp = group(state_mode)
gen blstate1 = stgrp if year==2014
gegen blstate = max(blstate1), by(fakeid_worker) 
lab var blstate "Baseline state"

* Baseline industry
gen bl_ind = ind_mode if year==2014
gegen blind = max(bl_ind), by(fakeid_worker) 
lab var blind "Baseline industry"
drop bl_ind

* Size of baseline employer
gen blemp1 = emptot if year==2014
gen blfem1 = empfem if year==2014
gen blmal1 = empmal if year==2014

gegen blemp = max(blemp1), by(fakeid_worker)
gegen blfem = max(blfem1), by(fakeid_worker)
gegen blmal = max(blmal1), by(fakeid_worker)

lab var blemp "Size of baseline employer"
lab var blfem "Baseline no. of women at employer"
lab var blmal "Baseline no. of men at employer"

* Inverse size of baseline employer
gen blempinv = 1/blemp
lab var blempinv "Inverse of employment at baseline establishment in 2014"


*****************************************************
* Indicator for whether at baseline establishment
*****************************************************

tostring fakeid_estab, gen(fakeid_estab_str) format("%16.0g")
gen xxx = fakeid_estab_str if year==2014
cap drop bl_employer
egen bl_employer = mode(xxx), by(fakeid_worker)
drop xxx
gen atblemp = (fakeid_estab_str==bl_employer)
lab var atblemp "Employee is at baseline employer"


********
* Quits 
********
	
	* At different employer: will be missing if have exited the data i.e. to non-employment or informal employment.
	gen atdiffemployer = (fakeid_estab_str!=bl_employer)
	replace atdiffemployer = . if fakeid_estab==.
	lab var atdiffemployer "At different employer"

	* Exit to unemployment (or informality/NILF)
	gen notindata = (fakeid_estab==.)
	lab var notindata "Not in data (unemployment/informality/NILF)"
			
	* CUT-baseline employees 
	gen diffnonCUTemployer = 1 if (treat==0 | treat==.) & atdiffemployer==1
	replace diffnonCUTemployer = 0 if diffnonCUTemployer==. &  (atdiffemployer==1 | atdiffemployer==0)
	lab var diffnonCUTemployer "At different non-CUT employer"
	
	gen diffCUTemployer = 1 if (treat==1) & atdiffemployer==1
	replace diffCUTemployer=0 if diffCUTemployer==. &  (atdiffemployer==1 | atdiffemployer==0)
	lab var diffCUTemployer "At different CUT employer"
	
	* Perhaps easier to interpret: observed at non-CUT employer. Defined as 
	gen nonCUTemployer = 1 if (treat==0 | treat==.) & fakeid_estab!=.
	replace nonCUTemployer = 0 if nonCUTemployer==. & fakeid_estab!=.
	lab var nonCUTemployer "At non-CUT employer"

	
********
* Interaction dummies
********
cap drop inter* yearj*
tab year, gen(yearj)

forvalues i = 1/7 {
	gen interj`i'=treatwork*yearj`i'
}
	
	lab var interj1 "2011"
	lab var interj2 "2012"
	lab var interj3 "2013"
	lab var interj4 "2014"
	lab var interj5 "2015"
	lab var interj6 "2016"
	lab var interj7 "2017"
	
	cap gen zero=0
	lab var zero "2014"

*******
* Baseline employer (for clustering SE)
*******
	egen blempl = group(bl_employer)
	lab var blempl "Baseline employer cluster"

********
* Save dataset
********

* Ensure te following variables are populated even when years are filled in
bys fakeid_worker: gegen xxx = max(treatwork)
replace treatwork = xxx
drop xxx

bys fakeid_worker: gegen xxx = max(signwork)
replace signwork = xxx
drop xxx

bys fakeid_worker: gegen xxx = max(coverwork)
replace coverwork = xxx
drop xxx

bys fakeid_worker: gegen xxx = max(singestwork)
replace singestwork = xxx
drop xxx

bys fakeid_worker: gegen xxx = max(bothFMwork)
replace bothFMwork = xxx
drop xxx


xtset fakeid_worker year
gen promoteblemp = (occ!=blocc)&(occ!=.)&(atblemp==1)

save "$files/incumbent_panel.dta", replace

cap log close
